Methodologies for Spatially Interpolating Climate Adaptation Options from Econometrics Methods

Author

Maxwell Mkondiwa

1 Introduction

This notebook provides machine learning and spatial methods for assessing the impacts of climate on yields and identifying adaptation options in a spatially explicit manner. The goal is to generate spatially differentiated (gridded) estimates of the effects of weather extremes and adaptation options.

We use publicly available datasets in India to demonstrate these models.

2 Important geospatial R packages: Terra, geodata,sf, sp

library(geodata)

India=gadm(country="IND", level=2, path="shp")
plot(India)

India_aoi=subset(India,India$NAME_1=="Bihar"|India$NAME_2%in%c("Ballia","Chandauli","Deoria","Ghazipur","Kushinagar","Maharajganj","Mau","Siddharth Nagar","Gorakhpur"))

plot(India_aoi)

plot(India_aoi, add=TRUE)

library(sf)

India_aoi_sf=st_as_sf(India_aoi)
library(mapview)

mapview(India_aoi_sf)
# Dissolve the district polygons to form new polygon of Bihar and EUP
library(sf)
India_aoi_sf_dis=st_union(India_aoi_sf)

3 Estimation Approaches

3.1 Causal Random Forest Model to Get Individual Treatment Effects

3.1.1 Sowing dates before Nov 20 vs after

library(grf)
library(policytree)

library(rio)
LDS=import("LDS_wheat_public_cleaned.csv")
monthlytemp=import("monthly_temp.csv")

LDS=cbind(LDS,monthlytemp)

LDSestim_sow=subset(LDS, select=c("Sowing_Date_Early","Sowing_Date_Schedule_rating_num","Sowing_Date_Schedule","L.tonPerHectare","I.q5505_weedSeverity_num","I.q5509_diseaseSeverity_num","I.q5506_insectSeverity_num","I.q5502_droughtSeverity_num",                                       "Nperha","P2O5perha","variety_type_NMWV","G.q5305_irrigTimes","A.q111_fGenderdum","Weedmanaged","temp","precip","wc2.1_30s_elev","M.q708_marketDistance","nitrogen_0.5cm","sand_0.5cm", "soc_5.15cm","O.largestPlotGPS.Latitude","O.largestPlotGPS.Longitude","april_mean_temp","march_mean_temp","feb_mean_temp"))

library(tidyr)
LDSestim_sow=LDSestim_sow %>% drop_na()


Y_cf_sowing=as.vector(LDSestim_sow$L.tonPerHectare)

## Causal random forest -----------------

X_cf_sowing=subset(LDSestim_sow, select=c("I.q5505_weedSeverity_num","I.q5509_diseaseSeverity_num","I.q5506_insectSeverity_num",
                                                  "Nperha","P2O5perha","variety_type_NMWV","G.q5305_irrigTimes","A.q111_fGenderdum","Weedmanaged","temp","precip","wc2.1_30s_elev",
                                                       "M.q708_marketDistance","nitrogen_0.5cm","sand_0.5cm", "soc_5.15cm","O.largestPlotGPS.Latitude","O.largestPlotGPS.Longitude","april_mean_temp","march_mean_temp","feb_mean_temp"))


W_cf_sowing <- as.factor(LDSestim_sow$Sowing_Date_Schedule)

# Probability random forest to create weights
W.multi_sowing.forest <- probability_forest(X_cf_sowing, W_cf_sowing,
  equalize.cluster.weights = FALSE,
  seed = 2
)
W.hat.multi.all_sowing <- predict(W.multi_sowing.forest, estimate.variance = TRUE)$predictions


# Regression forest to get expected responses 
Y.multi_sowing.forest <- regression_forest(X_cf_sowing, Y_cf_sowing,
  equalize.cluster.weights = FALSE,
  seed = 2
)

print(Y.multi_sowing.forest)
GRF forest object of type regression_forest 
Number of trees: 2000 
Number of training samples: 7562 
Variable importance: 
    1     2     3     4     5     6     7     8     9    10    11    12    13 
0.000 0.000 0.001 0.022 0.012 0.647 0.195 0.000 0.066 0.001 0.001 0.005 0.001 
   14    15    16    17    18    19    20    21 
0.001 0.004 0.002 0.024 0.015 0.001 0.001 0.002 
varimp.multi_sowing <- variable_importance(Y.multi_sowing.forest)
Y.hat.multi.all_sowing <- predict(Y.multi_sowing.forest, estimate.variance = TRUE)$predictions

# Fit multi-arm causal RF model
multi_sowing.forest <- multi_arm_causal_forest(X = X_cf_sowing, Y = Y_cf_sowing, W = W_cf_sowing ,W.hat=W.hat.multi.all_sowing,Y.hat=Y.hat.multi.all_sowing,seed=2) 

varimp.multi_sowing_cf <- variable_importance(multi_sowing.forest)

# Average treatment effects
multi_sowing_ate=average_treatment_effect(multi_sowing.forest, method="AIPW")
multi_sowing_ate
                      estimate    std.err            contrast outcome
T2_20Nov - T1_10Nov -0.1448949 0.03820657 T2_20Nov - T1_10Nov     Y.1
T3_30Nov - T1_10Nov -0.2882908 0.03637854 T3_30Nov - T1_10Nov     Y.1
T4_15Dec - T1_10Nov -0.4795005 0.03838067 T4_15Dec - T1_10Nov     Y.1
T5_16Dec - T1_10Nov -0.7188928 0.04067701 T5_16Dec - T1_10Nov     Y.1
 # Calibration check: Multi-arm causal RF does not yet calibration check
 ## We use binary causal RF to do that
 
W_cf_sowing_binary=as.vector(LDSestim_sow$Sowing_Date_Early) 

# Probability random forest to create weights
W.multi_sowing.forest_binary <- regression_forest(X_cf_sowing, W_cf_sowing_binary,
  equalize.cluster.weights = FALSE,
  seed = 2
)
W.hat.multi.all_sowing_binary <- predict(W.multi_sowing.forest_binary, estimate.variance = TRUE)$predictions


# Regression forest to get expected responses 
Y.multi_sowing.forest_binary <- regression_forest(X_cf_sowing, Y_cf_sowing,
  equalize.cluster.weights = FALSE,
  seed = 2
)

print(Y.multi_sowing.forest_binary)
GRF forest object of type regression_forest 
Number of trees: 2000 
Number of training samples: 7562 
Variable importance: 
    1     2     3     4     5     6     7     8     9    10    11    12    13 
0.000 0.000 0.001 0.022 0.012 0.647 0.195 0.000 0.066 0.001 0.001 0.005 0.001 
   14    15    16    17    18    19    20    21 
0.001 0.004 0.002 0.024 0.015 0.001 0.001 0.002 
varimp.multi_sowing_binary <- variable_importance(Y.multi_sowing.forest_binary)
Y.hat.multi.all_sowing_binary <- predict(Y.multi_sowing.forest_binary, estimate.variance = TRUE)$predictions

# Fit binary causal RF model
multi_sowing.forest_binary <- causal_forest(X = X_cf_sowing, Y = Y_cf_sowing, W = W_cf_sowing_binary ,W.hat=W.hat.multi.all_sowing_binary,Y.hat=Y.hat.multi.all_sowing_binary,seed=2) 

varimp.multi_sowing_cf_binary <- variable_importance(multi_sowing.forest_binary)

# Average treatment effects
multi_sowing_ate_binary=average_treatment_effect(multi_sowing.forest_binary,target.sample = "overlap")
multi_sowing_ate_binary
  estimate    std.err 
0.22488474 0.01897671 
multi_sowing_binary_calibration=test_calibration(multi_sowing.forest_binary)
multi_sowing_binary_calibration

Best linear fit using forest predictions (on held-out data)
as well as the mean forest prediction as regressors, along
with one-sided heteroskedasticity-robust (HC3) SEs:

                               Estimate Std. Error t value    Pr(>t)    
mean.forest.prediction         1.069524   0.084024 12.7288 < 2.2e-16 ***
differential.forest.prediction 1.728729   0.280558  6.1617 3.782e-10 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tau.hat_sowing=predict(multi_sowing.forest_binary , target.sample = "all",estimate.variance=TRUE)
summary(tau.hat_sowing$predict)
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-0.01807  0.18037  0.23215  0.22897  0.27845  0.42748 

3.1.2 Understanding Mechanisms

We can use the model to understand how farmers who used different amounts of fertilizer or inputs benefit from early sowing of wheat. In addition, we can investigate if farmers who are above the heat stress threshold gain from early sowing.

X_cf_sowingtau=data.frame(X_cf_sowing,tau.hat_sowing)

library(ggplot2)
ggplot(X_cf_sowingtau,
       aes(x = predictions)) +
  geom_histogram() +
  xlab('CATE') +
  geom_vline(xintercept = 0, col = 'black', linetype = 'dashed') +
  geom_vline(xintercept = multi_sowing_ate_binary["estimate"], col = 'red') +
  theme_bw()

# nitrogen
library(ggplot2)
sowingCATENitrogen=ggplot(X_cf_sowingtau,aes(Nperha,predictions))+
  geom_smooth(method="loess",formula=y~x,col="darkblue")+
  labs(x="Applied nitrogen (kg/ha)",y="Sowing treatment effect")
previous_theme <- theme_set(theme_bw())
sowingCATENitrogen

sowingCATEtemp=ggplot(X_cf_sowingtau,aes(temp,predictions))+
  geom_smooth(method="loess",formula=y~x,col="darkblue")+
  labs(x="Temperature",y="Sowing treatment effect")
previous_theme <- theme_set(theme_bw())
sowingCATEtemp

sowingCATEtempFeb=ggplot(X_cf_sowingtau,aes(feb_mean_temp,predictions))+
  geom_smooth(method="loess",formula=y~x,col="darkblue")+
  labs(x="February temperature",y="Sowing treatment effect")
previous_theme <- theme_set(theme_bw())
sowingCATEtempFeb

sowingCATEtempMarch=ggplot(X_cf_sowingtau,aes(march_mean_temp,predictions))+
  geom_smooth(method="loess",formula=y~x,col="darkblue")+
  labs(x="March temperature",y="Sowing treatment effect")
previous_theme <- theme_set(theme_bw())
sowingCATEtempMarch

sowingCATEtempAp=ggplot(X_cf_sowingtau,aes(april_mean_temp,predictions))+
  geom_smooth(method="loess",formula=y~x,col="darkblue")+
  labs(x="April temperature",y="Sowing treatment effect")
previous_theme <- theme_set(theme_bw())
sowingCATEtempAp

sowingCATEprecip=ggplot(X_cf_sowingtau,aes(precip,predictions))+
  geom_smooth(method="loess",formula=y~x,col="darkblue")+
  labs(x="Precipitation",y="Sowing treatment effect")
previous_theme <- theme_set(theme_bw())
sowingCATEprecip

# Mapping
library(sp)
X_cf_sowingtau_sp= SpatialPointsDataFrame(cbind(X_cf_sowingtau$O.largestPlotGPS.Longitude,X_cf_sowingtau$O.largestPlotGPS.Latitude),data=X_cf_sowingtau,proj4string=CRS("+proj=longlat +datum=WGS84"))


library(mapview)
mapviewOptions(fgb = FALSE)
tau.hat_sowing_predictionsmapview=mapview(X_cf_sowingtau_sp,zcol="predictions",layer.name="Early sowing yield gain (t/ha)")
tau.hat_sowing_predictionsmapview
# Based on classifications of treatment gains

library(gtools)
X_cf_sowingtau$sowing_treat_bin <- quantcut(X_cf_sowingtau$predictions)
table(X_cf_sowingtau$sowing_treat_bin)

[-0.0181,0.18]   (0.18,0.232]  (0.232,0.278]  (0.278,0.427] 
          1891           1890           1890           1891 
#Represent it
p <- X_cf_sowingtau %>%
  ggplot( aes(x=temp, fill=sowing_treat_bin)) +
    geom_boxplot()
p

The above analysis however shows only the benefits to those farmers who experienced that level of stress. What about those who didn’t experience it but could have experienced it? For a counterfactual analysis of how each farmer would have gained or lost if they had a terminal stress and had sown early assuming the same levels of use other inputs, we predict the treatment effect under the assumption that all variables are the same except for the maximum temperature which is fixed at a value above 31 (e.g., 32).

3.1.3 Early sowing gains in heat stress of 32

X_cf_sowing_pred=X_cf_sowing
X_cf_sowing_pred$temp=32

tau.hat_sowing_heat=predict(multi_sowing.forest_binary,X_cf_sowing_pred,estimate.variance=TRUE)
summary(tau.hat_sowing_heat$predict)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
0.02099 0.19525 0.24553 0.24207 0.28682 0.44038 
X_cf_sowingtau=data.frame(X_cf_sowing,tau.hat_sowing)

3.2 Alternative estimates using Bayesian geoadditive model

library(bamlss)
set.seed(111)
f <- yield_kgperha ~ s(march_mean_temp)+s(april_mean_temp)+ s(O.largestPlotGPS.Longitude,O.largestPlotGPS.Latitude,by=Sowing_Date_Early)

## estimate model.
b <- bamlss(f, data = LDS)
AICc 136222.5 logPost -68192.2 logLik -68104.2 edf 6.9982 eps 0.2773 iteration   1
AICc 129058.6 logPost -64582.2 logLik -64522.4 edf 6.8422 eps 0.1597 iteration   2
AICc 125338.7 logPost -62732.3 logLik -62662.8 edf 6.5814 eps 0.0622 iteration   3
AICc 124232.9 logPost -62196.4 logLik -62110.1 edf 6.3051 eps 0.0266 iteration   4
AICc 124068.8 logPost -62150.2 logLik -62026.9 edf 7.4363 eps 0.0093 iteration   5
AICc 123968.8 logPost -62136.6 logLik -61972.0 edf 12.348 eps 0.0058 iteration   6
AICc 123935.6 logPost -62295.0 logLik -61945.8 edf 21.889 eps 0.0035 iteration   7
AICc 123923.1 logPost -63801.1 logLik -61931.1 edf 30.272 eps 0.0021 iteration   8
AICc 123923.0 logPost -63802.5 logLik -61930.8 edf 30.588 eps 0.0001 iteration   9
AICc 123923.0 logPost -63802.5 logLik -61930.8 edf 30.588 eps 0.0001 iteration   9
elapsed time:  2.91sec
Starting the sampler...

|                    |   0% 34.51sec
|*                   |   5% 30.59sec  1.61sec
|**                  |  10% 30.51sec  3.39sec
|***                 |  15% 29.35sec  5.18sec
|****                |  20% 28.40sec  7.10sec
|*****               |  25% 27.15sec  9.05sec
|******              |  30% 25.64sec 10.99sec
|*******             |  35% 24.12sec 12.99sec
|********            |  40% 22.37sec 14.91sec
|*********           |  45% 20.59sec 16.85sec
|**********          |  50% 19.04sec 19.04sec
|***********         |  55% 17.25sec 21.08sec
|************        |  60% 15.38sec 23.07sec
|*************       |  65% 13.54sec 25.14sec
|**************      |  70% 11.64sec 27.16sec
|***************     |  75%  9.72sec 29.16sec
|****************    |  80%  7.80sec 31.19sec
|*****************   |  85%  5.85sec 33.14sec
|******************  |  90%  3.88sec 34.91sec
|******************* |  95%  1.93sec 36.71sec
|********************| 100%  0.00sec 38.55sec
summary(b)

Call:
bamlss(formula = f, data = LDS)
---
Family: gaussian 
Link function: mu = identity, sigma = log
*---
Formula mu:
---
yield_kgperha ~ s(march_mean_temp) + s(april_mean_temp) + s(O.largestPlotGPS.Longitude, 
    O.largestPlotGPS.Latitude, by = Sowing_Date_Early)
-
Parametric coefficients:
            Mean 2.5%  50% 97.5% parameters
(Intercept) 2832 2811 2831  2853       2828
-
Acceptance probability:
        Mean   2.5%    50% 97.5%
alpha 0.9857 0.9342 0.9986     1
-
Smooth terms:
                                                                                        Mean
s(march_mean_temp).tau21                                                           1.414e+06
s(march_mean_temp).alpha                                                           1.000e+00
s(march_mean_temp).edf                                                             5.913e+00
s(april_mean_temp).tau21                                                           1.028e+06
s(april_mean_temp).alpha                                                           1.000e+00
s(april_mean_temp).edf                                                             5.670e+00
s(O.largestPlotGPS.Longitude,O.largestPlotGPS.Latitude,by=Sowing_Date_Early).tau21 1.441e+06
s(O.largestPlotGPS.Longitude,O.largestPlotGPS.Latitude,by=Sowing_Date_Early).alpha 1.000e+00
s(O.largestPlotGPS.Longitude,O.largestPlotGPS.Latitude,by=Sowing_Date_Early).edf   2.178e+01
                                                                                        2.5%
s(march_mean_temp).tau21                                                           2.327e+05
s(march_mean_temp).alpha                                                           1.000e+00
s(march_mean_temp).edf                                                             4.373e+00
s(april_mean_temp).tau21                                                           4.449e+01
s(april_mean_temp).alpha                                                           1.000e+00
s(april_mean_temp).edf                                                             1.010e+00
s(O.largestPlotGPS.Longitude,O.largestPlotGPS.Latitude,by=Sowing_Date_Early).tau21 4.979e+05
s(O.largestPlotGPS.Longitude,O.largestPlotGPS.Latitude,by=Sowing_Date_Early).alpha 1.000e+00
s(O.largestPlotGPS.Longitude,O.largestPlotGPS.Latitude,by=Sowing_Date_Early).edf   1.764e+01
                                                                                         50%
s(march_mean_temp).tau21                                                           9.711e+05
s(march_mean_temp).alpha                                                           1.000e+00
s(march_mean_temp).edf                                                             5.898e+00
s(april_mean_temp).tau21                                                           7.063e+05
s(april_mean_temp).alpha                                                           1.000e+00
s(april_mean_temp).edf                                                             5.848e+00
s(O.largestPlotGPS.Longitude,O.largestPlotGPS.Latitude,by=Sowing_Date_Early).tau21 1.268e+06
s(O.largestPlotGPS.Longitude,O.largestPlotGPS.Latitude,by=Sowing_Date_Early).alpha 1.000e+00
s(O.largestPlotGPS.Longitude,O.largestPlotGPS.Latitude,by=Sowing_Date_Early).edf   2.179e+01
                                                                                       97.5%
s(march_mean_temp).tau21                                                           5.039e+06
s(march_mean_temp).alpha                                                           1.000e+00
s(march_mean_temp).edf                                                             7.532e+00
s(april_mean_temp).tau21                                                           4.192e+06
s(april_mean_temp).alpha                                                           1.000e+00
s(april_mean_temp).edf                                                             7.702e+00
s(O.largestPlotGPS.Longitude,O.largestPlotGPS.Latitude,by=Sowing_Date_Early).tau21 3.161e+06
s(O.largestPlotGPS.Longitude,O.largestPlotGPS.Latitude,by=Sowing_Date_Early).alpha 1.000e+00
s(O.largestPlotGPS.Longitude,O.largestPlotGPS.Latitude,by=Sowing_Date_Early).edf   2.533e+01
                                                                                   parameters
s(march_mean_temp).tau21                                                            1.100e-02
s(march_mean_temp).alpha                                                                   NA
s(march_mean_temp).edf                                                              9.270e-01
s(april_mean_temp).tau21                                                            0.000e+00
s(april_mean_temp).alpha                                                                   NA
s(april_mean_temp).edf                                                              0.000e+00
s(O.largestPlotGPS.Longitude,O.largestPlotGPS.Latitude,by=Sowing_Date_Early).tau21  7.931e+06
s(O.largestPlotGPS.Longitude,O.largestPlotGPS.Latitude,by=Sowing_Date_Early).alpha         NA
s(O.largestPlotGPS.Longitude,O.largestPlotGPS.Latitude,by=Sowing_Date_Early).edf    2.766e+01
---
Formula sigma:
---
sigma ~ 1
-
Parametric coefficients:
             Mean  2.5%   50% 97.5% parameters
(Intercept) 6.677 6.662 6.677 6.693      6.679
-
Acceptance probability:
        Mean   2.5%    50% 97.5%
alpha 0.9949 0.9563 1.0000     1
---
Sampler summary:
-
DIC = 123879.4 logLik = -61921.05 pd = 37.2835
runtime = 38.86
---
Optimizer summary:
-
AICc = 123923.1 edf = 30.5888 logLik = -61930.83
logPost = -63802.5 nobs = 7648 runtime = 2.91
## Plot estimated effects.
plot(b)

# ## Predict for each latitude and longitude
pred <- expand.grid(O.largestPlotGPS.Longitude = seq(82, 89, length = 100),O.largestPlotGPS.Latitude = seq(24,28, length = 100))

pred$Sowing_Date_Early=1

pred$march_mean_temp=mean(LDS$march_mean_temp)
pred$april_mean_temp=mean(LDS$april_mean_temp)

pred2 <- expand.grid(O.largestPlotGPS.Longitude = seq(82, 89, length = 100),O.largestPlotGPS.Latitude = seq(24,28, length = 100))

pred2$Sowing_Date_Early=0

pred2$march_mean_temp=mean(LDS$march_mean_temp)
pred2$april_mean_temp=mean(LDS$april_mean_temp)

tau_hat <- predict(b,newdata=pred)
tau_hat2 <- predict(b,newdata=pred2)


tau_hat=as.data.frame(tau_hat)
tau_hat2=as.data.frame(tau_hat2)

names(tau_hat)[1:2]=c("mu_1","sigma_1")
names(tau_hat2)[1:2]=c("mu_2","sigma_2")

pred_tau_hat=cbind(pred,tau_hat,tau_hat2)

pred_tau_hat=as.data.frame(pred_tau_hat)

pred_tau_hat$sowing_yield_gain=pred_tau_hat$mu_1-pred_tau_hat$mu_2


# library(terra)
# 
pred_tau_hat$Sowing_Date_Early=NULL
pred_tau_hat$march_mean_temp=NULL
pred_tau_hat$april_mean_temp=NULL
pred_tau_hat$mu_1=NULL
pred_tau_hat$sigma_1=NULL
pred_tau_hat$mu_2=NULL
pred_tau_hat$sigma_2=NULL
pred_tau_hat$sowing_yield_gain=pred_tau_hat$sowing_yield_gain/1000
 myras <- rast(pred_tau_hat, type="xyz")
plot(myras)

 library(raster)
 myras2=raster(myras)
 
library(sf)
India_aoi_sf_dis_sp=as_Spatial(India_aoi_sf_dis)
myras2=mask(myras2,India_aoi_sf_dis_sp)
plot(myras2)

4 Interpolation for point-geocoded data

4.1 Gridded data input variables

The first strategy is to translate all variables to the grid. This involves interpolation across space and using new variable names. In this case, instead of gender being a dummy, you use a proportion of female or male after interpolation.

4.1.1 Proximity polygons

library(rio)
LDS=import("LDS_wheat_public_cleaned.csv")

library(sp)
LDS_sp=SpatialPointsDataFrame(cbind(LDS$O.largestPlotGPS.Longitude,LDS$O.largestPlotGPS.Latitude),data=LDS,proj4string=CRS("+proj=longlat +datum=WGS84"))

library(terra)
LDS_v=vect(LDS_sp)
if (!require("rspat")) remotes::install_github('rspatial/rspat')

library(rspat)
v <- voronoi(LDS_v)
plot(v)
points(LDS_v)

v_india_aoi <- crop(v,India_aoi)
plot(v_india_aoi, "yield_kgperha")

# r <- rast(India_aoi, res=10000)
# vr <- terra::rasterize(v_india_aoi, r, "yield_kgperha")
# plot(vr)

4.1.2 Nearest-neigbor

#r <- rast(India_aoi, res=10000)
# gs <- gstat(formula=yield_kgperha~1, locations=~x+y, data=LDS_sp, nmax=5, set=list(idp = 0))
# nn <- interpolate(r, gs, debug.level=0)
# nnmsk <- mask(nn, vr)
# plot(nnmsk, 1)

4.1.3 Inverse distance weighted

# library(gstat)
# gs <- gstat(formula=yield_kgperha~1, locations=~x+y, data=d)
# idw <- interpolate(r, gs, debug.level=0)
# idwr <- mask(idw, vr)
# plot(idwr, 1)
# 
# 

4.2 Model based kriging

4.3 Causal ML using gridded variables

4.4 Model-based predictions

4.4.1 Random forest and raster prediction

This approach follows notes from reago website by Robert Hjimans (https://reagro.org/cases/croptrials.html).

4.4.1.1 Interpolate RF model

library(rio)
LDS=import("LDS_wheat_public_cleaned.csv")
table(LDS$A.q103_district,LDS$A.q102_state)
                
                 Bihar UttarPradesh
  Arah             208            0
  Araria           117            0
  Arwal            181            0
  Aurangabad       194            0
  Balia              0          216
  Banka            176            0
  Begusarai        213            0
  Bhagalpur        207            0
  Buxar            207            0
  Chandauli          0          208
  Deoria             0          209
  EastChamparan    205            0
  Gaya             193            0
  Gazipur            0          210
  Gopalganj        210            0
  Gorakhpur          0          210
  Jehanabad        189            0
  Kaimur           204            0
  Katihar          115            0
  Khagaria         205            0
  Kushinagar         0          195
  Lakhisarai       195            0
  Madhepura        169            0
  Maharajganj        0          210
  Mau                0          204
  Munger           210            0
  Muzaffarpur      204            0
  Nalanda          196            0
  Patna            203            0
  Purnia             8            0
  Rohtas           196            0
  Saharsa          163            0
  Samastipur       202            0
  Saran            209            0
  Sheohar          209            0
  Siddharthnagar     0          193
  Siwan            198            0
  Supaul           206            0
  Vaishali         196            0
  WestChamparan    205            0
plot(LDS$O.largestPlotGPS.Longitude, LDS$O.largestPlotGPS.Latitude, col="red", pch=20)

# Random Forest Estimation 
library(randomForest)

RF_model <- randomForest(yield_kgperha ~ O.largestPlotGPS.Longitude + O.largestPlotGPS.Latitude, data=LDS)

varImpPlot(RF_model)

RF_model_pred = predict(RF_model)
plot(LDS$yield_kgperha, RF_model_pred)
abline(0,1)

# Raster prediction

## Create grid with extent
library(raster)

e <- extent(c(min(LDS$O.largestPlotGPS.Longitude)-2,max(LDS$O.largestPlotGPS.Longitude)+2,min(LDS$O.largestPlotGPS.Latitude)-2,max(LDS$O.largestPlotGPS.Latitude)+2))

aoi <- raster(ext=e, res=1/6)

# Interpolate
pp <- interpolate(aoi, RF_model, xyNames=c('O.largestPlotGPS.Longitude', 'O.largestPlotGPS.Latitude'))
pp <- mask(pp, India_aoi_sf)
pp <- crop(pp, India_aoi_sf)
plot(pp)

#points(LDS$O.largestPlotGPS.Longitude, LDS$O.largestPlotGPS.Latitude, col="blue")

4.4.1.2 Raster predict from RF model: Kriging Wheat Prices

The spatial prediction -the variables are spatial- function takes two arguments: the prediction variables and the price prediction model. Both the “stats” package(loaded as a randomForest dependency) and the “raster” package have a function called “predict” that can make predictions. Since we are dealing with spatial data, we add a prefix to the function name to ensure the “predict” function in the raster “package” is used.

# library(ncdf4)
# library(raster)
# library(sf)
# library(data.table)
# library(exactextractr)
# library(terra)
# library(rgdal)
# library(geodata)
# 
# # Step 1
# #dir.create("rasters") # Create a directory to put the downloaded raster files
# #population=population(2015,0.5,path="rasters")
# #elevationglobal_geodata=elevation_global(0.5,path="rasters")
# 
# rasterstack <- stack() 
# raster::crs(rasterstack)="+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
# rasterlist <- list.files(path = "rasters",recursive=TRUE, 
#                          pattern = "*.tif$", 
#                          full.names = TRUE) # Character vector of relative filepaths
# 
# 
# for (rasterpath in rasterlist) {
#       rasterfile <- raster(rasterpath)
#       rasterstack <- addLayer(rasterstack, rasterfile)
#     }
# 
# refrenceraster <- rasterstack[[1]]
# 
# LDS_sp=SpatialPointsDataFrame(cbind(LDS$O.largestPlotGPS.Longitude,LDS$O.largestPlotGPS.Latitude),data=LDS,proj4string=CRS("+proj=longlat +datum=WGS84"))
# 
# 
# India_aoi_sp=as_Spatial(India_aoi_sf)
# 
# # # Step 2
# India_aoi_sp.raster <- rasterize(India_aoi_sp, refrenceraster)
# 
# # Step 3
# latitudes <- xFromCell(India_aoi_sp.raster, 1:length(India_aoi_sp.raster))  
# longitudes <- yFromCell(India_aoi_sp.raster, 1:length(India_aoi_sp.raster))
# 
# # Step 4
# India_aoi_sp.raster.lati <- India_aoi_sp.raster.long <- India_aoi_sp.raster
# values(India_aoi_sp.raster.lati) <- latitudes
# values(India_aoi_sp.raster.long) <- longitudes
# 
# # Step 5
# names(India_aoi_sp.raster.long) <- "Longitude"
# names(India_aoi_sp.raster.lati) <- "Latitude"
# 
# 
# rasterstack <- stack(rasterstack, India_aoi_sp.raster.long, India_aoi_sp.raster.lati)
# 
# # step 6
# library(terra)
# predict.vrbs.r = terra::extract(rasterstack,
#                   LDS_sp,
#                   #buffer=1, # Meters
#                  # buffer=5000, # Meters
#                   #small=TRUE,
#                   fun = mean)
# 
# predict.vrbs.r <- predict.vrbs.r[complete.cases(predict.vrbs.r),]
# 
# RF_model_rast <- randomForest(x=predict.vrbs.r,
#                          y=LDS$L.q607_farmGatePricePerKg)
# 
# raster.prediction <- raster::predict(rasterstack, # Prediction variable rasters
#                                      RF_model_rast # Prediction  model
#                                     )   
# 
# 
# 
# raster.prediction.c=crop(raster.prediction,India_aoi_sp)
# raster.prediction.m=mask(raster.prediction.c,India_aoi_sp)
# 
# # Plot the raster
# library(rasterVis)
# 
# raster.prediction.m_plot=levelplot(raster.prediction.m,par.settings=RdBuTheme())
# raster.prediction.m_plot

4.4.2 Spatial Bayesian Geostatistical Gaussian Process Model [Aka Bayesian Kriging]

If one is interested in calculating other measures other than the predicted value (for example, the probability of exceeding some amount), then a Bayesian gaussian process model is the best alternative in that using Markov Chain Monte Carlo simulations one can use a probabilistic assessment.

### Bayesian kriging 

LDS_small <- LDS[sample(1:nrow(LDS),800),] 


coords=dplyr::select(LDS_small,O.largestPlotGPS.Longitude,O.largestPlotGPS.Latitude)
coords=as.matrix(coords)

# The public version of the data has duplicated coordinates
# We need to jitter these because spatial Bayesian kriging requires unique coordinates.
library(geoR)
coords=jitterDupCoords(coords,min=2,max=10)
coords=as.matrix(coords)

# Bayesian models take much time to render. We sample 1000 observations to showcase the approach
library(spBayes)
n.samples=1000

t1 <- Sys.time()

r <-1
n.ltr <- r*(r+1)/2

priors <- list("phi.Unif"=list(rep(1,r), rep(10,r)), "K.IW"=list(r, diag(rep(1,r))), "tau.sq.IG"=c(2, 1))

starting <- list("phi"=rep(3/0.5,r), "A"=rep(1,n.ltr), "tau.sq"=1)

tuning <- list("phi"=rep(0.1,r), "A"=rep(0.01, n.ltr), "tau.sq"=0.01)

cf.sowing.sp <- spBayes::spSVC(yield_kgperha~1, data=LDS_small,coords=coords,
                                  starting= starting,
                                  tuning=tuning,
                                  priors=priors,
                                  cov.model="exponential",n.samples=n.samples,
                                  n.omp.threads=15,svc.cols=c("(Intercept)"))
----------------------------------------
    General model description
----------------------------------------
Model fit with 800 observations.

Number of covariates 1.

Number of space varying covariates 1.

Using the exponential spatial correlation model.

Number of MCMC samples 1000.

Priors and hyperpriors:
    beta flat.
    K IW hyperpriors:
    df: 1.00000
    S:
    1.000   

    phi Unif lower bound hyperpriors:   1.000   
    phi Unif upper bound hyperpriors:   10.000  

    tau.sq IG hyperpriors shape=2.00000 and scale=1.00000

Source compiled with OpenMP, posterior sampling is using 15 thread(s).
-------------------------------------------------
        Sampling
-------------------------------------------------
Sampled: 100 of 1000, 10.00%
Report interval Metrop. Acceptance rate: 48.00%
Overall Metrop. Acceptance rate: 48.00%
-------------------------------------------------
Sampled: 200 of 1000, 20.00%
Report interval Metrop. Acceptance rate: 44.00%
Overall Metrop. Acceptance rate: 46.00%
-------------------------------------------------
Sampled: 300 of 1000, 30.00%
Report interval Metrop. Acceptance rate: 29.00%
Overall Metrop. Acceptance rate: 40.33%
-------------------------------------------------
Sampled: 400 of 1000, 40.00%
Report interval Metrop. Acceptance rate: 31.00%
Overall Metrop. Acceptance rate: 38.00%
-------------------------------------------------
Sampled: 500 of 1000, 50.00%
Report interval Metrop. Acceptance rate: 25.00%
Overall Metrop. Acceptance rate: 35.40%
-------------------------------------------------
Sampled: 600 of 1000, 60.00%
Report interval Metrop. Acceptance rate: 30.00%
Overall Metrop. Acceptance rate: 34.50%
-------------------------------------------------
Sampled: 700 of 1000, 70.00%
Report interval Metrop. Acceptance rate: 28.00%
Overall Metrop. Acceptance rate: 33.57%
-------------------------------------------------
Sampled: 800 of 1000, 80.00%
Report interval Metrop. Acceptance rate: 24.00%
Overall Metrop. Acceptance rate: 32.38%
-------------------------------------------------
Sampled: 900 of 1000, 90.00%
Report interval Metrop. Acceptance rate: 32.00%
Overall Metrop. Acceptance rate: 32.33%
-------------------------------------------------
Sampled: 1000 of 1000, 100.00%
Report interval Metrop. Acceptance rate: 29.00%
Overall Metrop. Acceptance rate: 32.00%
-------------------------------------------------
t2 <- Sys.time()
t2 - t1
Time difference of 1.087918 mins
burn.in <- floor(0.75*n.samples)

cf.sowing.sp.r <- spRecover(cf.sowing.sp, start=burn.in)
Source compiled with OpenMP, posterior sampling is using 1 thread(s).
-------------------------------------------------
    Recovering beta and w
-------------------------------------------------
Sampled: 99 of 251, 39.44%
Sampled: 199 of 251, 79.28%
# Kriging
library(terra)
library(stars)
library(raster)
library(gstat) # Use gstat's idw routine
library(sp)    # Used for the spsample function
library(tmap)
library(geodata)

# India=gadm(country="IND", level=1, path=tempdir())
# plot(India)
# India_State_Boundary=subset(India,India$NAME_1=="Bihar")
# plot(India_State_Boundary)
# library(sf)

#India_State_Boundary=st_as_sf(India_State_Boundary)

#India_State_Boundary_Bihar_sp=as_Spatial(India_State_Boundary_Bihar)

library(spBayes)
India_aoi_sf_dis_sp=as_Spatial(India_aoi_sf_dis)

LDS_small_sp=SpatialPointsDataFrame(cbind(LDS_small$O.largestPlotGPS.Longitude,LDS_small$O.largestPlotGPS.Latitude),data=LDS_small,proj4string=CRS("+proj=longlat +datum=WGS84"))


LDS_small_sp@bbox <- India_aoi_sf_dis_sp@bbox

grd <- as.data.frame(spsample(LDS_small_sp, "regular", n=10000))

names(grd)       <- c("X", "Y")
coordinates(grd) <- c("X", "Y")
gridded(grd)     <- TRUE  # Create SpatialPixel object
fullgrid(grd)    <- TRUE  # Create SpatialGrid object
plot(grd)

India_aoi_sf_dis_sp_poly <- India_aoi_sf_dis_sp@polygons[[1]]@Polygons[[1]]@coords
India_aoi_sf_dis_sp_poly <- as.matrix(India_aoi_sf_dis_sp_poly)

pred.coords <- SpatialPoints(grd)@coords
pred.coords =as.matrix(pred.coords)

pred.covars <- as.matrix(rep(1, nrow(pred.coords)))

cf.sowing.sp.pred <- spPredict(cf.sowing.sp.r,pred.coords=pred.coords,
                                    pred.covars=pred.covars, n.omp.threads=15)
Source compiled with OpenMP, posterior sampling is using 15 thread(s).
-------------------------------------------------
    Point-wise sampling of predicted w
-------------------------------------------------
Sampled: 99 of 251, 39.44%
Sampled: 199 of 251, 79.28%
cf.sowing.sp.pred.pred.mu = apply(cf.sowing.sp.pred$p.y.predictive.samples,1,mean)
cf.sowing.sp.pred.sd = apply(cf.sowing.sp.pred$p.y.predictive.samples,1,sd)

library(MBA)
library(fields)

pred.grid <- as.data.frame(list(pred.coords,pred.mu=cf.sowing.sp.pred.pred.mu,pred.sd=cf.sowing.sp.pred.sd))

coordinates(pred.grid) = c("X", "Y")
gridded(pred.grid) <- TRUE
pred.mu.image <- as.image.SpatialGridDataFrame(pred.grid["pred.mu"])
pred.sd.image <- as.image.SpatialGridDataFrame(pred.grid["pred.sd"])


# predict and probability ------------------------------------------------
cf.sowing.sp.pred.pred.prob_3tons=rowSums(cf.sowing.sp.pred$p.y.predictive.samples>3000)/251
cf.sowing.sp.pred.pred.prob_5tons=rowSums(cf.sowing.sp.pred$p.y.predictive.samples>5000)/251


pred.grid <- as.data.frame(list(pred.coords,pred.mu=cf.sowing.sp.pred.pred.mu,pred.sd=cf.sowing.sp.pred.sd,
                                pred.prob_3tons=cf.sowing.sp.pred.pred.prob_3tons,
                                pred.prob_5tons=cf.sowing.sp.pred.pred.prob_5tons))

coordinates(pred.grid) = c("X", "Y")
gridded(pred.grid) <- TRUE

pred.mu.image <- as.image.SpatialGridDataFrame(pred.grid["pred.mu"])
pred.sd.image <- as.image.SpatialGridDataFrame(pred.grid["pred.sd"])
pred.prob.image_3tons <- as.image.SpatialGridDataFrame(pred.grid["pred.prob_3tons"])
pred.prob.image_5tons<- as.image.SpatialGridDataFrame(pred.grid["pred.prob_5tons"])

# Rastervis
library(rasterVis)
pred.mu=pred.grid["pred.mu"]
pred.mu=raster(pred.mu)
pred.mu=mask(pred.mu,India_aoi_sf_dis_sp)
pred.mu_plot=levelplot(pred.mu,par.settings=RdBuTheme(),contour=TRUE)
pred.mu_plot

# Standard deviation
library(rasterVis)
pred.sd=pred.grid["pred.sd"]
pred.sd=raster(pred.sd)
pred.sd=mask(pred.sd,India_aoi_sf_dis_sp)
pred.sd_plot=levelplot(pred.sd,par.settings=RdBuTheme(),contour=TRUE)
pred.sd_plot

# Probability of 3 tons
pred.prob_3tons=pred.grid["pred.prob_3tons"]
pred.prob_3tons=raster(pred.prob_3tons)
pred.prob_3tons=mask(pred.prob_3tons,India_aoi_sf_dis_sp)
pred.prob_3tons_plot=levelplot(pred.prob_3tons,par.settings=RdBuTheme(),contour=TRUE)
pred.prob_3tons_plot

# Probability of 5 tons
pred.prob_5tons=pred.grid["pred.prob_5tons"]
pred.prob_5tons=raster(pred.prob_5tons)
pred.prob_5tons=mask(pred.prob_5tons,India_aoi_sf_dis_sp)
pred.prob_5tons_plot=levelplot(pred.prob_5tons,par.settings=RdBuTheme(),contour=TRUE)
pred.prob_5tons_plot

4.4.3 Spatial Bayesian Geoadditive Model

library(bamlss)
set.seed(111)
f <- yield_kgperha ~  s(O.largestPlotGPS.Longitude,O.largestPlotGPS.Latitude)

## estimate model.
b <- bamlss(f, data = LDS)
AICc 124684.3 logPost -62379.2 logLik -62338.1 edf 4.0017 eps 0.0571 iteration   1
AICc 124680.3 logPost -62412.1 logLik -62336.1 edf 4.0285 eps 0.0001 iteration   2
AICc 124642.1 logPost -62435.7 logLik -62316.7 edf 4.2724 eps 0.0012 iteration   3
AICc 124399.2 logPost -62396.0 logLik -62193.6 edf 5.9280 eps 0.0079 iteration   4
AICc 123883.3 logPost -62208.8 logLik -61930.0 edf 11.637 eps 0.0198 iteration   5
AICc 123525.4 logPost -62020.8 logLik -61740.3 edf 22.327 eps 0.0173 iteration   6
AICc 123437.4 logPost -61950.1 logLik -61689.0 edf 29.494 eps 0.0105 iteration   7
AICc 123436.0 logPost -61951.6 logLik -61687.5 edf 30.404 eps 0.0014 iteration   8
AICc 123436.0 logPost -61951.6 logLik -61687.5 edf 30.404 eps 0.0000 iteration   9
AICc 123436.0 logPost -61951.6 logLik -61687.5 edf 30.404 eps 0.0000 iteration   9
elapsed time:  2.06sec
Starting the sampler...

|                    |   0% 36.89sec
|*                   |   5% 35.72sec  1.88sec
|**                  |  10% 36.99sec  4.11sec
|***                 |  15% 35.76sec  6.31sec
|****                |  20% 37.76sec  9.44sec
|*****               |  25% 36.57sec 12.19sec
|******              |  30% 31.62sec 13.55sec
|*******             |  35% 29.03sec 15.63sec
|********            |  40% 25.42sec 16.95sec
|*********           |  45% 22.31sec 18.25sec
|**********          |  50% 19.50sec 19.50sec
|***********         |  55% 17.03sec 20.81sec
|************        |  60% 14.70sec 22.05sec
|*************       |  65% 12.58sec 23.36sec
|**************      |  70% 10.60sec 24.74sec
|***************     |  75%  8.67sec 26.02sec
|****************    |  80%  6.83sec 27.31sec
|*****************   |  85%  5.04sec 28.55sec
|******************  |  90%  3.31sec 29.83sec
|******************* |  95%  1.64sec 31.11sec
|********************| 100%  0.00sec 32.41sec
## Plot estimated effects.
plot(b)

## Predict for each latitude and longitude
pred <- expand.grid(O.largestPlotGPS.Longitude = seq(82, 89, length = 100),O.largestPlotGPS.Latitude = seq(24,28, length = 100))
                    
yield_hat <- predict(b,newdata=pred)

yield_hat=as.data.frame(yield_hat)

pred_yield_hat=cbind(pred,yield_hat)

#pred_yield_hat$sigma=NULL
library(terra)

myras <- rast(pred_yield_hat, type="xyz")
plot(myras)

library(raster)
myras2=raster(myras)

library(sf)
India_aoi_sf_dis_sp=as_Spatial(India_aoi_sf_dis)
myras2=mask(myras2,India_aoi_sf_dis_sp)
plot(myras2)

4.4.4 BAMLSS gridded treatment effects: post-processing

library(bamlss)
set.seed(111)
f <- predictions ~  s(O.largestPlotGPS.Longitude,O.largestPlotGPS.Latitude)

## estimate model.
b <- bamlss(f, data = X_cf_sowingtau)
AICc -24865.5 logPost 12472.15 logLik 12463.39 edf 30.476 eps 0.1084 iteration   1
AICc -25039.2 logPost 12558.82 logLik 12550.43 edf 30.694 eps 0.0167 iteration   2
AICc -25041.2 logPost 12559.79 logLik 12551.51 edf 30.750 eps 0.0019 iteration   3
AICc -25041.2 logPost 12559.79 logLik 12551.51 edf 30.755 eps 0.0000 iteration   4
AICc -25041.2 logPost 12559.79 logLik 12551.51 edf 30.755 eps 0.0000 iteration   4
elapsed time:  0.36sec
Starting the sampler...

|                    |   0% 20.23sec
|*                   |   5% 22.23sec  1.17sec
|**                  |  10% 20.97sec  2.33sec
|***                 |  15% 19.32sec  3.41sec
|****                |  20% 18.24sec  4.56sec
|*****               |  25% 17.88sec  5.96sec
|******              |  30% 17.06sec  7.31sec
|*******             |  35% 15.82sec  8.52sec
|********            |  40% 14.82sec  9.88sec
|*********           |  45% 13.75sec 11.25sec
|**********          |  50% 12.66sec 12.66sec
|***********         |  55% 11.45sec 13.99sec
|************        |  60% 10.14sec 15.21sec
|*************       |  65%  8.87sec 16.47sec
|**************      |  70%  7.83sec 18.28sec
|***************     |  75%  6.71sec 20.13sec
|****************    |  80%  5.74sec 22.96sec
|*****************   |  85%  4.57sec 25.92sec
|******************  |  90%  3.21sec 28.88sec
|******************* |  95%  1.65sec 31.42sec
|********************| 100%  0.00sec 34.63sec
## Plot estimated effects.
plot(b)

## Predict for each latitude and longitude
pred <- expand.grid(O.largestPlotGPS.Longitude = seq(82, 89, length = 100),O.largestPlotGPS.Latitude = seq(24,28, length = 100))
                    
yield_hat <- predict(b,newdata=pred)

yield_hat=as.data.frame(yield_hat)

pred_yield_hat=cbind(pred,yield_hat)

#pred_yield_hat$sigma=NULL
library(terra)

myras <- rast(pred_yield_hat, type="xyz")
plot(myras)

library(raster)
myras2=raster(myras)

library(sf)
India_aoi_sf_dis_sp=as_Spatial(India_aoi_sf_dis)
myras2=mask(myras2,India_aoi_sf_dis_sp)
plot(myras2)

5 Areal data

There two important cases in which one may have areal data to use in climate adaptation prioritization. The first case is in which there is plot or farm level plot but that the statistical agency did not provide individual level coordinates. In these cases, the only of relating the individual observations to spatial data is through a higher level spatial level. Secondly, there are cases especially with administrative data in which the data are available only at the aggregated spatial level.

5.1 Using centroid

One can get the centroid of each polygon, e.g., then fit a geostatistical model as in the spatial Bayesian Geostatistical Gaussian Process Model or geoadditive model as above.

5.1.1 Markov Random Field (MRF) Geoadditive Structured and Unstructured Spatial Model

In the case where there are data at individual farm level albeit not have geocoordinates,one can use structured geoadditive model to ascertain the patterns of the explained and unexplained spatial effect.

library(BayesX)
library("R2BayesX")


India_aoi_sp=as_Spatial(India_aoi_sf)

library(rgdal)
#writeOGR(India_aoi_sp,dsn="shp",layer="India_aoi_sp",driver = "ESRI Shapefile")

shpname <- file.path(getwd(), "shp" , "India_aoi_sp")

India_aoi_sp_bnd <- BayesX::shp2bnd(shpname=shpname, regionnames = "NAME_2", check.is.in = F)
Reading map ... finished
Note: map consists originally of 50 polygons
Note: map consists of 47 regions
#write.bnd(India_aoi_sp_bnd, "shp/India_aoi_sp_bnd.bnd", replace = FALSE)

# za <- bayesx(dp2011 ~ code +   sx(ID_2, bs = "mrf", map = India_aoi_sp_bnd) +
#                sx(ID_2, bs = "re"), iter = 1200,  step = 10, data = India_aoi_sp_bnd)

5.2 Other Areal-Point Downscaling Methods

6 Conclusion

7 References